rm(list=ls(all=TRUE))
library(zoo)
library(plm)
library(reshape2)
library(data.table)
library(xlsx)
library(xtable)
library(DataCombine)
library(plyr)
library(vioplot)
library(ggplot2)
library(scales)
library(grid)
library(memisc)
setwd("~/Dropbox/LagIdentification/jop_final_version/replication")

#run simulation to replicate Figure A-3
simulation<-function(phi,rho,kappa,beta,delta) {
    # panel properties
    #phi <- 0.5
    #rho <- 0.5
    #kappa <- 0.5
    #beta <- 2
    #delta <- 1
    units<- 100
    time<- 50
    n<-units*time
    id <- seq(1:units)
        
    # merge fes and create panel
    # frame <- data.frame(id, fe)
    panel<-expand.grid(seq(1:time),seq(1:units))
    names(panel)<-c("t","id")
    # total <- merge(frame,panel,by=c("id"), all=T)
    
    # function to generate individual panels of u = phi*u_t-1 + nu, nu~N(0,1)
    # and x = rho*x_t-1 + kappa*u + eta, eta~N(0,1)
    gen.panel<-function(nothing){
        u<-vector(length=time)
        fe<-rnorm(1) # fixed effect
        nu<-rnorm(time) # white noise
        u[1]<-nu[1] # random first value
        for(i in 2:length(u)) {
            u[i]  <- phi*u[i-1] + nu[i]
        }
        x<-vector(length=time)
        eta<-rnorm(time) # white noise
        x[1]<-eta[1] # random first value
        for(i in 2:length(u)) {
            x[i]  <- rho*x[i-1] + kappa*u[i] + fe + eta[i]
        }
        lagx<-vector(length=time)
        lagx[1]<- NA
        for(i in 2:length(u)) {
            lagx[i]  <- x[i-1]
        }
        y<-vector(length=time)
        e<-rnorm(time,0,5) # white noise
        for(i in 1:length(e)) {
            y[i]  <- beta*x[i] + delta*u[i]+ fe + e[i]
        }
        lagy<-vector(length=time)
        lagy[1]<- NA # missing first value
        for(i in 2:length(e)) {
            lagy[i]  <- y[i-1]
        }
        results<-data.frame(cbind(y,lagy,x,lagx,fe,u,e,eta,nu))
        return(results) # drop first value
    }
    
    # create full panels
    dat<-do.call( rbind, replicate(units, gen.panel(1), simplify=FALSE ) )
    ## these functions might also work for x and u...
    #u<-as.vector(replicate(units,arima.sim(n=time,list(ar=phi))))
    #x<-as.vector(replicate(units,arima.sim(n=time,list(ar=rho))))+kappa*u
    
    # generate regression error term e
   #  dat$e<-rnorm(n,0,1)
    
    # create plm object
    # predictors<-data.table(dat,total)
    predictors<-data.table(panel,dat)
    setkeyv(predictors,c('id','t'))
    data<-plm.data(predictors, c("id","t"))
    
    formula1 <- dynformula(y~x, lag.form=list(0,y=1))
    formula2 <- dynformula(y~lagx, lag.form=list(0,y=1))
    
    abond1 <- tryCatch({pgmm(formula1, model="onestep", data=data,
                            gmm.inst=~y+x, lag.gmm=list( c(2,3) ), transformation="ld")}, 
                      error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
    if (class(abond1[1])=="NULL") {
      abond1_beta<- NA
      abond1_t<- NA
      print("There was an error here in abond1")
    } else {
      abond1_beta<- summary(abond1)$coefficients[2,1]
      abond1_t<- summary(abond1)$coefficients[2,3]     
    }
   
    abond2 <- tryCatch({pgmm(formula2, model="onestep", data=data,
                            gmm.inst=~y+x, lag.gmm=list( c(2,3) ), transformation="ld")}, 
                      error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
   if (class(abond2[1])=="NULL") {
     abond2_beta<- NA
     abond2_t<- NA
     print("There was an error here in abond2")
   } else {
     abond2_beta<- summary(abond2)$coefficients[2,1]
     abond2_t<- summary(abond2)$coefficients[2,3]     
   }
   print(abond1_beta)
   print(abond2_beta)
   lagid <- plm(y~lagx, data=data, model="within")
   lagid_beta<- summary(lagid)$coefficients[1,1]
   lagid_t<- summary(lagid)$coefficients[1,3]
   outputs <- cbind(phi,rho,kappa,beta,delta,abond1_beta,abond1_t,abond2_beta,abond2_t,lagid_beta,lagid_t)
   return(outputs)
}


#lag.simulate<-Simulate(
#simulation(phi,rho,kappa,beta,delta),
#expand.grid(
#phi<-c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9),
#rho<- .5, #define the list of values rho takes
#kappa<- .5, #define the list of values kappa takes.
#beta<- 2,
#delta<- 1
#),
#nsim=1,
#trace=10
#)

phi_list<-c(0,seq(.1,.9,.2))
rho_list<-c(.5)
kappa_list<-c(0,.5,2)
beta_list<-c(0,2)
delta_list<-c(1)

nphi <- length(phi_list)
nrho <- length(rho_list)
nkappa <- length(kappa_list)
nbeta <- length(beta_list)
ndelta <- length(delta_list)
numparams<- 12 #set the number of columns in the output (results as defined above) to which results are saved for each iteration
reps <- 50 #set the number of iteration
d <- data.frame(matrix(, nrow = nphi*nrho*nkappa*nbeta*ndelta*reps, ncol = numparams)) #d is the matrix that we save our results to.
i <- 0 #define i so that we can use it to save results to d[i,] for each iteration
output = NULL
for(phi in phi_list) {
    print(phi)
    for(rho in rho_list) {
        for(kappa in kappa_list) {
            print(kappa)
            for(beta in beta_list) {
                for(delta in delta_list) {
                    for(i3 in 1:reps) {
                        i <- i + 1
                        tryCatch({
                          d[i,] <- cbind(i3,simulation(phi,rho,kappa,beta,delta)) #for each iteration i3, which ranges from 0 through 1000, we save the results computed from model(n,rho_y,rho_x).
                        }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
                    }
                }
            }
        }
    }
}

sim_results<-data.frame(d)
names(sim_results) <- c("n","phi","rho","kappa","beta","delta",
                        "abond1_beta","abond1_t","abond2_beta","abond2_t","lagid_beta","lagid_t")
head(sim_results)
#sim_results[complete.cases(sim_results),]

#create a backup
write.table(sim_results, file ="sim_results/sim_results_gmm.txt")


##########################################################################################################################################################################################
#plot
sim_results<-read.table("sim_results/sim_results_gmm.txt", header=TRUE)
sim_results<-sim_results[complete.cases(sim_results),]
attach(sim_results)
sim_results_backup <- sim_results
sim_results <- sim_results_backup

sim_results1 <- sim_results[sim_results$kappa==0 & sim_results$beta==2, ]
sim_results2 <- sim_results[sim_results$kappa==.5 & sim_results$beta==2, ]
sim_results3 <- sim_results[sim_results$kappa==2 & sim_results$beta==2, ]

sim_results7 <- sim_results[sim_results$kappa==0 & sim_results$beta==0, ]
sim_results8 <- sim_results[sim_results$kappa==.5 & sim_results$beta==0, ]
sim_results9 <- sim_results[sim_results$kappa==2 & sim_results$beta==0, ]


sim_results7$abond1_t_sig <- ifelse(sim_results7$abond1_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results8$abond1_t_sig <- ifelse(sim_results8$abond1_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results9$abond1_t_sig <- ifelse(sim_results9$abond1_t>=abs(qt(0.025,df=(5000-2))), 1, 0)

sim_results7$abond2_t_sig <- ifelse(sim_results7$abond2_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results8$abond2_t_sig <- ifelse(sim_results8$abond2_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results9$abond2_t_sig <- ifelse(sim_results9$abond2_t>=abs(qt(0.025,df=(5000-2))), 1, 0)

sim_results7$lagid_t_sig <- ifelse(sim_results7$lagid_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results8$lagid_t_sig <- ifelse(sim_results8$lagid_t>=abs(qt(0.025,df=(5000-2))), 1, 0)
sim_results9$lagid_t_sig <- ifelse(sim_results9$lagid_t>=abs(qt(0.025,df=(5000-2))), 1, 0)


ols.abond11 <- c(tapply(sim_results1$abond1_beta, INDEX = sim_results1$phi, mean))#compute the mean of coefficients from the abond1 model
ols.abond21 <- c(tapply(sim_results1$abond2_beta, INDEX = sim_results1$phi, mean))#compute the mean of abond2 estimates
ols.lagid1 <- c(tapply(sim_results1$lagid_beta, INDEX = sim_results1$phi, mean))#compute the mean of coefficients from the lag model

ols.abond12 <- c(tapply(sim_results2$abond1_beta, INDEX = sim_results2$phi, mean))#compute the mean of coefficients from the abond1 model
ols.abond22 <- c(tapply(sim_results2$abond2_beta, INDEX = sim_results2$phi, mean))#compute the mean of abond2 estimates
ols.lagid2 <- c(tapply(sim_results2$lagid_beta, INDEX = sim_results2$phi, mean))#compute the mean of coefficients from the lag model

ols.abond13 <- c(tapply(sim_results3$abond1_beta, INDEX = sim_results3$phi, mean))#compute the mean of coefficients from the abond1 model
ols.abond23 <- c(tapply(sim_results3$abond2_beta, INDEX = sim_results3$phi, mean))#compute the mean of abond2 estimates
ols.lagid3 <- c(tapply(sim_results3$lagid_beta, INDEX = sim_results3$phi, mean))#compute the mean of coefficients from the lag model

ols.abond1.sig7 <- c(tapply(sim_results7$abond1_t_sig, INDEX = sim_results7$phi, mean))
ols.abond2.sig7 <- c(tapply(sim_results7$abond2_t_sig, INDEX = sim_results7$phi, mean))
ols.lagid.sig7  <- c(tapply(sim_results7$lagid_t_sig, INDEX = sim_results7$phi, mean))

ols.abond1.sig8 <- c(tapply(sim_results8$abond1_t_sig, INDEX = sim_results8$phi, mean))
ols.abond2.sig8 <- c(tapply(sim_results8$abond2_t_sig, INDEX = sim_results8$phi, mean))
ols.lagid.sig8  <- c(tapply(sim_results8$lagid_t_sig, INDEX = sim_results8$phi, mean))

ols.abond1.sig9 <- c(tapply(sim_results9$abond1_t_sig, INDEX = sim_results9$phi, mean))
ols.abond2.sig9 <- c(tapply(sim_results9$abond2_t_sig, INDEX = sim_results9$phi, mean))
ols.lagid.sig9  <- c(tapply(sim_results9$lagid_t_sig, INDEX = sim_results9$phi, mean))



ols.abond1.bias1 <- ols.abond11 - 2
ols.abond2.bias1 <- ols.abond21 - 2
ols.lagid.bias1 <- ols.lagid1 - 1

ols.abond1.rmse1 <- (sim_results1$abond1_beta - 2)^2
ols.abond2.rmse1 <- (sim_results1$abond2_beta - 2)^2
ols.lagid.rmse1 <- (sim_results1$lagid_beta - 1)^2
ols.abond1.rmse1 <- sqrt((rowsum(ols.abond1.rmse1,sim_results1$phi))/100)
ols.abond2.rmse1 <- sqrt((rowsum(ols.abond2.rmse1,sim_results1$phi))/100)
ols.lagid.rmse1 <- sqrt((rowsum(ols.lagid.rmse1,sim_results1$phi))/100)
bias1 <- c(ols.abond1.bias1,ols.abond2.bias1,ols.lagid.bias1)
bias1
rmse1 <- c(ols.abond1.rmse1,ols.abond2.rmse1,ols.lagid.rmse1)
rmse1
t1 <- c(ols.abond1.sig7,ols.abond2.sig7,ols.lagid.sig7)
t1

ols.abond1.bias2 <- ols.abond12 - 2 #2 is the true parameter
ols.abond2.bias2 <- ols.abond22 - 2
ols.lagid.bias2 <- ols.lagid2 - 1 #1 is beta*rho where beta is set at 2 and rho at 0.5.

ols.abond1.rmse2 <- (sim_results2$abond1_beta - 2)^2 #2 is the true parameter
ols.abond2.rmse2 <- (sim_results2$abond2_beta - 2)^2
ols.lagid.rmse2 <- (sim_results2$lagid_beta - 1)^2 #1 is beta*rho where beta is set at 2 and rho at 0.5.
ols.abond1.rmse2 <- sqrt((rowsum(ols.abond1.rmse2,sim_results2$phi))/100)
ols.abond2.rmse2 <- sqrt((rowsum(ols.abond2.rmse2,sim_results2$phi))/100)
ols.lagid.rmse2 <- sqrt((rowsum(ols.lagid.rmse2,sim_results2$phi))/100)
bias2 <- c(ols.abond1.bias2,ols.abond2.bias2,ols.lagid.bias2)
bias2
rmse2 <- c(ols.abond1.rmse2,ols.abond2.rmse2,ols.lagid.rmse2)
t2 <- c(ols.abond1.sig8,ols.abond2.sig8,ols.lagid.sig8)
t2

ols.abond1.bias3 <- ols.abond13 - 2 #2 is the true parameter
ols.abond2.bias3 <- ols.abond23 - 2
ols.lagid.bias3 <- ols.lagid3 - 1 #1 is beta*rho where beta is set at 2 and rho at 0.5.
ols.abond1.rmse3 <- (sim_results3$abond1_beta - 2)^2 #2 is the true parameter
ols.abond2.rmse3 <- (sim_results3$abond2_beta - 2)^2
ols.lagid.rmse3 <- (sim_results3$lagid_beta - 1)^2 #1 is beta*rho where beta is set at 2 and rho at 0.5.
ols.abond1.rmse3 <- sqrt((rowsum(ols.abond1.rmse3,sim_results3$phi))/100)
ols.abond2.rmse3 <- sqrt((rowsum(ols.abond2.rmse3,sim_results3$phi))/100)
ols.lagid.rmse3 <- sqrt((rowsum(ols.lagid.rmse3,sim_results3$phi))/100)
bias3 <- c(ols.abond1.bias3,ols.abond2.bias3,ols.lagid.bias3)
bias3
rmse3 <- c(ols.abond1.rmse3,ols.abond2.rmse3,ols.lagid.rmse3)
rmse3
t3 <- c(ols.abond1.sig9,ols.abond2.sig9,ols.lagid.sig9)
t3



model<-(rep(c(1, 2, 3), each=6))

b1 <- bias1[model<4]
m1 <- model
m1 <-factor(m1, labels=c("ABOND1", "ABOND2", "LAGID"))
phi1 <- (rep(c(0,seq(0.1,0.9,by=0.2)), times=3))
data1 <- data.frame(b1,m1,phi1)
data.rmse1 <- data.frame(rmse1,m1,phi1)
data.t1 <- data.frame(t1,m1,phi1)

b2 <- bias2[model<4]
m2 <- model
m2 <-factor(m2, labels=c("ABOND1", "ABOND2", "LAGID"))
phi2 <- (rep(c(0,seq(0.1,0.9,by=0.2)), times=3))
data2 <- data.frame(b2,m2,phi2)
data.rmse2 <- data.frame(rmse2,m2,phi2)
data.t2 <- data.frame(t2,m2,phi2)

b3 <- bias3[model<4]
m3 <- model
m3 <-factor(m3, labels=c("ABOND1", "ABOND2", "LAGID"))
phi3 <- (rep(c(0,seq(0,0.9,by=0.2)), times=3))
data3 <- data.frame(b3,m3,phi3)
data.rmse3 <- data.frame(rmse3,m3,phi3)
data.t3 <- data.frame(t3,m3,phi3)



colours<-c(ABOND1="black", ABOND2="blue", LAGID="red")
linetype<-c(ABOND1="dotted", ABOND2="dashed", LAGID="solid")

#bias in estimated coefficients
fmt <- function(){
    f <- function(x) as.character(round(x,2))
    f
}


d<-ggplot(data1, aes(x=phi1, y=b1, group=m1))
plot1<-d+labs(title=expression(paste("Bias, ",kappa,"=0, ",beta,"=2")))+geom_line(aes(group=factor(m1), colour=factor(m1), linetype=factor(m1))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Bias", limits=c(-.6,1.5), breaks=seq(from=-.5, to=2, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

d<-ggplot(data2, aes(x=phi2, y=b2, group=m2))
plot2<-d+labs(title=expression(paste("Bias, ",kappa,"=.5, ",beta,"=2")))+geom_line(aes(group=factor(m2), colour=factor(m2), linetype=factor(m2))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Bias", limits=c(-.6,1.5), breaks=seq(from=-.5, to=2, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

d<-ggplot(data3, aes(x=phi3, y=b3, group=m3))
plot3<-d+labs(title=expression(paste("Bias, ",kappa,"=2, ",beta,"=2")))+geom_line(aes(group=factor(m3), colour=factor(m3), linetype=factor(m3))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Bias", limits=c(-.6,1.5), breaks=seq(from=-.5, to=2, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

d<-ggplot(data.rmse1, aes(x=phi1, y=rmse1, group=m1))
plot4<-d+labs(title=expression(paste("RMSE, ",kappa,"=0, ",beta,"=2")))+geom_line(aes(group=factor(m1), colour=factor(m1), linetype=factor(m1))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("RMSE", limits=c(0,1), breaks=seq(from=0, to=1.5, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

d<-ggplot(data.rmse2, aes(x=phi2, y=rmse2, group=m2))
plot5<-d+labs(title=expression(paste("RMSE, ",kappa,"=.5, ",beta,"=2")))+geom_line(aes(group=factor(m2), colour=factor(m2), linetype=factor(m2))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("RMSE", limits=c(0,1), breaks=seq(from=0, to=1.5, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

d<-ggplot(data.rmse3, aes(x=phi3, y=rmse3, group=m3))
plot6<-d+labs(title=expression(paste("RMSE, ",kappa,"=2, ",beta,"=2")))+geom_line(aes(group=factor(m3), colour=factor(m3), linetype=factor(m3))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("RMSE", limits=c(0,1), breaks=seq(from=0, to=1.5, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

#t
f<-ggplot(data.t1, aes(x=phi1, y=t1, group=m1))
plot7<-f+labs(title=expression(paste("Type-1 Error, ",kappa,"=0, ",beta,"=0")))+geom_line(aes(group=factor(m1), colour=factor(m1), linetype=factor(m1))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Frequency", limits=c(0,1), breaks=seq(from=0, to=1, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

f<-ggplot(data.t2, aes(x=phi2, y=t2, group=m2))
plot8<-f+labs(title=expression(paste("Type-1 Error, ",kappa,"=.5, ",beta,"=0")))+geom_line(aes(group=factor(m2), colour=factor(m2), linetype=factor(m2))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Frequency", limits=c(0,1), breaks=seq(from=0, to=1, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()

f<-ggplot(data.t3, aes(x=phi3, y=t3, group=m3))
plot9<-f+labs(title=expression(paste("Type-1 Error, ",kappa,"=2, ",beta,"=0")))+geom_line(aes(group=factor(m3), colour=factor(m3), linetype=factor(m3))) +scale_x_continuous(expression(paste(phi)), limits=c(0,0.9), labels=fmt())+scale_y_continuous("Frequency", limits=c(0,1), breaks=seq(from=0, to=1, by=0.5))+ scale_colour_manual(name="Method", values=colours)+scale_linetype_manual(name="Method", values=linetype)+geom_abline(intercept=0, slope=0, colour="darkgray") + theme_classic()


#### combine plots, one legend
library(ggplot2)
library(gridExtra)

grid_arrange_shared_legend <- function(...) {
  plots <- list(...)
  g <- ggplotGrob(plots[[1]] + theme(legend.position="bottom"))$grobs
  legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
  lheight <- sum(legend$height)
  grid.arrange(
    do.call(arrangeGrob, lapply(plots, function(x)
      x + theme(legend.position="none"))),
    legend,
    ncol = 1,
    heights = unit.c(unit(1, "npc") - lheight, lheight))
}

grid_arrange_shared_legend(plot1, plot2, plot3, plot4, plot5, plot6, plot7, plot8, plot9)
plotted<-recordPlot()
pdf("figures/figA-3.pdf", width=8, height=7)
replayPlot(plotted)
dev.off()
